{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "from torch.autograd import Variable\n",
    "import torch.optim as optim\n",
    "import torch.nn.functional as F\n",
    "import matplotlib.pyplot as plt\n",
    "import random\n",
    "import numpy as np\n",
    "from collections import Counter\n",
    "import pickle\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* http://www.isca-speech.org/archive/Interspeech_2016/pdfs/1352.PDF\n",
    "* https://arxiv.org/pdf/1409.0473.pdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "USE_CUDA = torch.cuda.is_available()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "USE_CUDA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def prepare_sequence(seq, to_ix):\n",
    "    idxs = list(map(lambda w: to_ix[w] if w in to_ix.keys() else to_ix[\"<UNK>\"], seq))\n",
    "    tensor = Variable(torch.LongTensor(idxs)).cuda() if USE_CUDA else Variable(torch.LongTensor(idxs))\n",
    "    return tensor\n",
    "\n",
    "\n",
    "flatten = lambda l: [item for sublist in l for item in sublist]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data load and Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train = open(\"../dataset/atis-2.train.w-intent.iob\",\"r\").readlines()\n",
    "train = [t[:-1] for t in train]\n",
    "train = [[t.split(\"\\t\")[0].split(\" \"),t.split(\"\\t\")[1].split(\" \")[:-1],t.split(\"\\t\")[1].split(\" \")[-1]] for t in train]\n",
    "train = [[t[0][1:-1],t[1][1:],t[2]] for t in train]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "seq_in,seq_out, intent = list(zip(*train))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "vocab = set(flatten(seq_in))\n",
    "slot_tag = set(flatten(seq_out))\n",
    "intent_tag = set(intent)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "LENGTH=50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sin=[]\n",
    "sout=[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for i in range(len(seq_in)):\n",
    "    temp = seq_in[i]\n",
    "    if len(temp)<LENGTH:\n",
    "        temp.append('<EOS>')\n",
    "        while len(temp)<LENGTH:\n",
    "            temp.append('<PAD>')\n",
    "    else:\n",
    "        temp = temp[:LENGTH]\n",
    "        temp[-1]='<EOS>'\n",
    "    sin.append(temp)\n",
    "    \n",
    "    temp = seq_out[i]\n",
    "    if len(temp)<LENGTH:\n",
    "        while len(temp)<LENGTH:\n",
    "            temp.append('<PAD>')\n",
    "    else:\n",
    "        temp = temp[:LENGTH]\n",
    "        temp[-1]='<EOS>'\n",
    "    sout.append(temp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "word2index = {'<PAD>': 0, '<UNK>':1,'<SOS>':2,'<EOS>':3}\n",
    "for token in vocab:\n",
    "    if token not in word2index.keys():\n",
    "        word2index[token]=len(word2index)\n",
    "\n",
    "index2word = {v:k for k,v in word2index.items()}\n",
    "\n",
    "tag2index = {'<PAD>' : 0}\n",
    "for tag in slot_tag:\n",
    "    if tag not in tag2index.keys():\n",
    "        tag2index[tag] = len(tag2index)\n",
    "index2tag = {v:k for k,v in tag2index.items()}\n",
    "\n",
    "intent2index={}\n",
    "for ii in intent_tag:\n",
    "    if ii not in intent2index.keys():\n",
    "        intent2index[ii] = len(intent2index)\n",
    "index2intent = {v:k for k,v in intent2index.items()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train = list(zip(sin,sout,intent))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'atis_flight'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train[0][2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_data=[]\n",
    "\n",
    "for tr in train:\n",
    "    \n",
    "    temp = prepare_sequence(tr[0],word2index)\n",
    "    temp = temp.view(1,-1)\n",
    "    \n",
    "    temp2 = prepare_sequence(tr[1],tag2index)\n",
    "    temp2 = temp2.view(1,-1)\n",
    "    \n",
    "    temp3 = Variable(torch.LongTensor([intent2index[tr[2]]])).cuda() if USE_CUDA else Variable(torch.LongTensor([intent2index[tr[2]]]))\n",
    "    \n",
    "    train_data.append((temp,temp2,temp3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def getBatch(batch_size,train_data):\n",
    "    random.shuffle(train_data)\n",
    "    sindex=0\n",
    "    eindex=batch_size\n",
    "    while eindex < len(train_data):\n",
    "        batch = train_data[sindex:eindex]\n",
    "        temp = eindex\n",
    "        eindex = eindex+batch_size\n",
    "        sindex = temp\n",
    "        \n",
    "        yield batch"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Modeling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class Encoder(nn.Module):\n",
    "    def __init__(self, input_size,embedding_size, hidden_size,batch_size=16 ,n_layers=1):\n",
    "        super(Encoder, self).__init__()\n",
    "        \n",
    "        self.input_size = input_size\n",
    "        self.embedding_size = embedding_size\n",
    "        self.hidden_size = hidden_size\n",
    "        self.n_layers = n_layers\n",
    "        self.batch_size=batch_size\n",
    "        \n",
    "        self.embedding = nn.Embedding(input_size, embedding_size)\n",
    "        self.lstm = nn.LSTM(embedding_size, hidden_size, n_layers, batch_first=True,bidirectional=True)\n",
    "    \n",
    "    def init_weights(self):\n",
    "        self.embedding.weight.data.uniform_(-0.1, 0.1)\n",
    "        #self.lstm.weight.data.\n",
    "    \n",
    "    def init_hidden(self,input):\n",
    "        hidden = Variable(torch.zeros(self.n_layers*2, input.size(0), self.hidden_size)).cuda() if USE_CUDA else Variable(torch.zeros(self.n_layers*2, input.size(0), self.hidden_size))\n",
    "        context = Variable(torch.zeros(self.n_layers*2, input.size(0), self.hidden_size)).cuda() if USE_CUDA else Variable(torch.zeros(self.n_layers*2, input.size(0), self.hidden_size))\n",
    "        return (hidden,context)\n",
    "     \n",
    "    def forward(self, input,input_masking):\n",
    "        \"\"\"\n",
    "        input : B,T (LongTensor)\n",
    "        input_masking : B,T (PAD 마스킹한 ByteTensor)\n",
    "        \n",
    "        <PAD> 제외한 리얼 Context를 다시 만들어서 아웃풋으로\n",
    "        \"\"\"\n",
    "        \n",
    "        self.hidden = self.init_hidden(input)\n",
    "        \n",
    "        embedded = self.embedding(input)\n",
    "        output, self.hidden = self.lstm(embedded, self.hidden)\n",
    "        \n",
    "        real_context=[]\n",
    "        \n",
    "        for i,o in enumerate(output): # B,T,D\n",
    "            real_length = input_masking[i].data.tolist().count(0) # 실제 길이\n",
    "            real_context.append(o[real_length-1])\n",
    "            \n",
    "        return output, torch.cat(real_context).view(input.size(0),-1).unsqueeze(1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "class Decoder(nn.Module):\n",
    "    \n",
    "    def __init__(self,slot_size,intent_size,embedding_size,hidden_size,batch_size=16,n_layers=1,dropout_p=0.1):\n",
    "        super(Decoder, self).__init__()\n",
    "        \n",
    "        self.hidden_size = hidden_size\n",
    "        self.slot_size = slot_size\n",
    "        self.intent_size = intent_size\n",
    "        self.n_layers = n_layers\n",
    "        self.dropout_p = dropout_p\n",
    "        self.embedding_size = embedding_size\n",
    "        self.batch_size = batch_size\n",
    "\n",
    "        # Define the layers\n",
    "        self.embedding = nn.Embedding(self.slot_size, self.embedding_size) #TODO encoder와 공유하도록 하고 학습되지 않게..\n",
    "\n",
    "        #self.dropout = nn.Dropout(self.dropout_p)\n",
    "        self.lstm = nn.LSTM(self.embedding_size+self.hidden_size*2, self.hidden_size, self.n_layers, batch_first=True)\n",
    "        self.attn = nn.Linear(self.hidden_size,self.hidden_size) # Attention\n",
    "        self.slot_out = nn.Linear(self.hidden_size*2, self.slot_size)\n",
    "        self.intent_out = nn.Linear(self.hidden_size*2,self.intent_size)\n",
    "    \n",
    "    def init_weights(self):\n",
    "        self.embedding.weight.data.uniform_(-0.1, 0.1)\n",
    "        #self.out.bias.data.fill_(0)\n",
    "        #self.out.weight.data.uniform_(-0.1, 0.1)\n",
    "        #self.lstm.weight.data.\n",
    "    \n",
    "    def Attention(self, hidden, encoder_outputs, encoder_maskings):\n",
    "        \"\"\"\n",
    "        hidden : 1,B,D\n",
    "        encoder_outputs : B,T,D\n",
    "        encoder_maskings : B,T # ByteTensor\n",
    "        \"\"\"\n",
    "        \n",
    "        hidden = hidden.squeeze(0).unsqueeze(2)  # 히든 : (1,배치,차원) -> (배치,차원,1)\n",
    "        \n",
    "        batch_size = encoder_outputs.size(0) # B\n",
    "        max_len = encoder_outputs.size(1) # T\n",
    "        energies = self.attn(encoder_outputs.contiguous().view(batch_size*max_len,-1)) # B*T,D -> B*T,D\n",
    "        energies = energies.view(batch_size,max_len,-1) # B,T,D (배치,타임,차원)\n",
    "        attn_energies = energies.bmm(hidden).transpose(1,2) # B,T,D * B,D,1 --> B,1,T\n",
    "        attn_energies = attn_energies.squeeze(1).masked_fill(encoder_maskings,-1e12) # PAD masking\n",
    "        \n",
    "        alpha = F.softmax(attn_energies) # B,T\n",
    "        alpha = alpha.unsqueeze(1) # B,1,T\n",
    "        context = alpha.bmm(encoder_outputs) # B,1,T * B,T,D => B,1,D\n",
    "        \n",
    "        return context # B,1,D\n",
    "    \n",
    "    def init_hidden(self,input):\n",
    "        hidden = Variable(torch.zeros(self.n_layers*1, input.size(0), self.hidden_size)).cuda() if USE_CUDA else Variable(torch.zeros(self.n_layers*2,input.size(0), self.hidden_size))\n",
    "        context = Variable(torch.zeros(self.n_layers*1, input.size(0), self.hidden_size)).cuda() if USE_CUDA else Variable(torch.zeros(self.n_layers*2, input.size(0), self.hidden_size))\n",
    "        return (hidden,context)\n",
    "    \n",
    "    def forward(self, input,context,encoder_outputs,encoder_maskings,training=True):\n",
    "        \"\"\"\n",
    "        input : B,L(length)\n",
    "        enc_context : B,1,D\n",
    "        \"\"\"\n",
    "        # Get the embedding of the current input word\n",
    "        embedded = self.embedding(input)\n",
    "        hidden = self.init_hidden(input)\n",
    "        decode=[]\n",
    "        aligns = encoder_outputs.transpose(0,1)\n",
    "        length = encoder_outputs.size(1)\n",
    "        for i in range(length): # Input_sequence와 Output_sequence의 길이가 같기 때문..\n",
    "            aligned = aligns[i].unsqueeze(1)# B,1,D\n",
    "            _, hidden = self.lstm(torch.cat((embedded,context,aligned),2), hidden) # input, context, aligned encoder hidden, hidden\n",
    "            \n",
    "            # for Intent Detection\n",
    "            if i==0: \n",
    "                intent_hidden = hidden[0].clone() \n",
    "                intent_context = self.Attention(intent_hidden, encoder_outputs,encoder_maskings) \n",
    "                concated = torch.cat((intent_hidden,intent_context.transpose(0,1)),2) # 1,B,D\n",
    "                intent_score = self.intent_out(concated.squeeze(0)) # B,D\n",
    "\n",
    "            concated = torch.cat((hidden[0],context.transpose(0,1)),2)\n",
    "            score = self.slot_out(concated.squeeze(0))\n",
    "            softmaxed = F.log_softmax(score)\n",
    "            decode.append(softmaxed)\n",
    "            _,input = torch.max(softmaxed,1)\n",
    "            embedded = self.embedding(input.unsqueeze(1))\n",
    "            \n",
    "            # 그 다음 Context Vector를 Attention으로 계산\n",
    "            context = self.Attention(hidden[0], encoder_outputs,encoder_maskings) \n",
    "        # 요고 주의! time-step을 column-wise concat한 후, reshape!!\n",
    "        slot_scores = torch.cat(decode,1)\n",
    "        return slot_scores.view(input.size(0)*length,-1), intent_score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "LEARNING_RATE=0.001\n",
    "EMBEDDING_SIZE=64\n",
    "HIDDEN_SIZE=64\n",
    "BATCH_SIZE=16\n",
    "LENGTH=50\n",
    "STEP_SIZE=10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "encoder = Encoder(len(word2index),EMBEDDING_SIZE,HIDDEN_SIZE)\n",
    "decoder = Decoder(len(tag2index),len(intent2index),len(tag2index)//3,HIDDEN_SIZE*2)\n",
    "if USE_CUDA:\n",
    "    encoder = encoder.cuda()\n",
    "    decoder = decoder.cuda()\n",
    "    \n",
    "encoder.init_weights()\n",
    "decoder.init_weights()\n",
    "\n",
    "loss_function_1 = nn.CrossEntropyLoss(ignore_index=0)\n",
    "loss_function_2 = nn.CrossEntropyLoss()\n",
    "enc_optim= optim.Adam(encoder.parameters(), lr=LEARNING_RATE)\n",
    "dec_optim = optim.Adam(decoder.parameters(),lr=LEARNING_RATE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 0  epoch 0  :  7.8252\n",
      "Step 0  epoch 100  :  3.77393\n",
      "Step 0  epoch 200  :  2.56081\n",
      "Step 1  epoch 0  :  2.45676\n",
      "Step 1  epoch 100  :  2.1388\n",
      "Step 1  epoch 200  :  1.91471\n",
      "Step 2  epoch 0  :  1.62789\n",
      "Step 2  epoch 100  :  1.52514\n",
      "Step 2  epoch 200  :  1.4501\n",
      "Step 3  epoch 0  :  1.78647\n",
      "Step 3  epoch 100  :  1.07129\n",
      "Step 3  epoch 200  :  0.974154\n",
      "Step 4  epoch 0  :  0.512485\n",
      "Step 4  epoch 100  :  0.730378\n",
      "Step 4  epoch 200  :  0.703024\n",
      "Step 5  epoch 0  :  0.474473\n",
      "Step 5  epoch 100  :  0.548027\n",
      "Step 5  epoch 200  :  0.49901\n",
      "Step 6  epoch 0  :  0.486824\n",
      "Step 6  epoch 100  :  0.424438\n",
      "Step 6  epoch 200  :  0.395794\n",
      "Step 7  epoch 0  :  0.387563\n",
      "Step 7  epoch 100  :  0.339434\n",
      "Step 7  epoch 200  :  0.312062\n",
      "Step 8  epoch 0  :  0.137902\n",
      "Step 8  epoch 100  :  0.268969\n",
      "Step 8  epoch 200  :  0.256304\n",
      "Step 9  epoch 0  :  0.592923\n",
      "Step 9  epoch 100  :  0.239827\n",
      "Step 9  epoch 200  :  0.21194\n"
     ]
    }
   ],
   "source": [
    "for step in range(STEP_SIZE):\n",
    "    losses=[]\n",
    "    for i, batch in enumerate(getBatch(BATCH_SIZE,train_data)):\n",
    "        x,y_1,y_2 = zip(*batch)\n",
    "        x = torch.cat(x)\n",
    "        tag_target = torch.cat(y_1)\n",
    "        intent_target = torch.cat(y_2)\n",
    "        x_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in x]).view(BATCH_SIZE,-1)\n",
    "        y_1_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in tag_target]).view(BATCH_SIZE,-1)\n",
    " \n",
    "        encoder.zero_grad()\n",
    "        decoder.zero_grad()\n",
    "\n",
    "        output, hidden_c = encoder(x,x_mask)\n",
    "        start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*BATCH_SIZE])).cuda().transpose(1,0) if USE_CUDA else Variable(torch.LongTensor([[word2index['<SOS>']]*BATCH_SIZE])).transpose(1,0)\n",
    "\n",
    "        tag_score, intent_score = decoder(start_decode,hidden_c,output,x_mask)\n",
    "\n",
    "        loss_1 = loss_function_1(tag_score,tag_target.view(-1))\n",
    "        loss_2 = loss_function_2(intent_score,intent_target)\n",
    "\n",
    "        loss = loss_1+loss_2\n",
    "        losses.append(loss.data.cpu().numpy()[0] if USE_CUDA else loss.data.numpy()[0])\n",
    "        loss.backward()\n",
    "\n",
    "        torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0)\n",
    "        torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0)\n",
    "\n",
    "        enc_optim.step()\n",
    "        dec_optim.step()\n",
    "\n",
    "        if i % 100==0:\n",
    "            print(\"Step\",step,\" epoch\",i,\" : \",np.mean(losses))\n",
    "            losses=[]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from data import *\n",
    "from model import Encoder,Decoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "processed_data_path : /home/dsksd-server/dsksd/RNN-for-Joint-NLU/data/\n"
     ]
    }
   ],
   "source": [
    "_,word2index,tag2index,intent2index = preprocessing('../dataset/corpus/atis-2.train.w-intent.iob',60)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "index2tag = {v:k for k,v in tag2index.items()}\n",
    "index2intent = {v:k for k,v in intent2index.items()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "encoder = Encoder(len(word2index),64,64)\n",
    "decoder = Decoder(len(tag2index),len(intent2index),len(tag2index)//3,64*2)\n",
    "\n",
    "encoder.load_state_dict(torch.load('models/jointnlu-encoder.pkl'))\n",
    "decoder.load_state_dict(torch.load('models/jointnlu-decoder.pkl'))\n",
    "if USE_CUDA:\n",
    "    encoder = encoder.cuda()\n",
    "    decoder = decoder.cuda()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test = open(\"../dataset/corpus/atis-2.dev.w-intent.iob\",\"r\").readlines()\n",
    "test = [t[:-1] for t in test]\n",
    "test = [[t.split(\"\\t\")[0].split(\" \"),t.split(\"\\t\")[1].split(\" \")[:-1],t.split(\"\\t\")[1].split(\" \")[-1]] for t in test]\n",
    "test = [[t[0][1:-1],t[1][1:],t[2]] for t in test]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Input Sentence :  thanks and what's the last flight back from washington to boston\n",
      "Truth        :  O O O O B-flight_mod O O O B-fromloc.city_name O B-toloc.city_name\n",
      "Prediction :  O O O O B-flight_mod O O O B-fromloc.city_name O B-toloc.city_name\n",
      "Truth        :  atis_flight\n",
      "Prediction :  atis_flight\n"
     ]
    }
   ],
   "source": [
    "index = random.choice(range(len(test)))\n",
    "test_raw = test[index][0]\n",
    "test_in = prepare_sequence(test_raw,word2index)\n",
    "test_mask = Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, test_in.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, test_in.data)))).view(1,-1)\n",
    "start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*1])).cuda().transpose(1,0) if USE_CUDA else Variable(torch.LongTensor([[word2index['<SOS>']]*1])).transpose(1,0)\n",
    "\n",
    "output, hidden_c = encoder(test_in.unsqueeze(0),test_mask.unsqueeze(0))\n",
    "tag_score, intent_score = decoder(start_decode,hidden_c,output,test_mask)\n",
    "\n",
    "v,i = torch.max(tag_score,1)\n",
    "print(\"Input Sentence : \",*test[index][0])\n",
    "print(\"Truth        : \",*test[index][1])\n",
    "print(\"Prediction : \",*list(map(lambda ii:index2tag[ii],i.data.tolist())))\n",
    "v,i = torch.max(intent_score,1)\n",
    "print(\"Truth        : \",test[index][2])\n",
    "print(\"Prediction : \",index2intent[i.data.tolist()[0]])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# TODO "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* LSTM forget gate의 bias 1로 고정\n",
    "* intent decoder의 attention을 독립적인 weight로 구성해보기\n",
    "* log_softmax 안하고 그냥  crossentropy해보기"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
